"""
统计需要融合的疾病

3 个源里都出现的疾病  39_pingan_xywy.txt
39 和 平安 出现的疾病 39_pingan.txt
39 和 寻医问药 出现的疾病 39_xywy.txt
平安 和 寻医问药 出现的疾病 pingan_xywy.txt

39 自己  39.txt
pingan 自己 pingan.txt
xywy  自己  xywy.txt

"""
import json
import os


def get_disease(path):
    l = []
    for filename in os.listdir(path):
        disease_path = os.path.join(path, filename)
        with open(disease_path, mode='r', encoding='utf-8') as f:
            data = json.load(f)
            l.append(data['title'])
    return l


def count_item(l):
    for item in l:
        if l.count(item) > 1:
            print(item)


if __name__ == '__main__':
    json_path1 = 'D:/PycharmProjects/three_source_json_data/39/json_data/'
    json_path2 = 'D:/PycharmProjects/three_source_json_data/pingan/json_data/'
    json_path3 = 'D:/PycharmProjects/three_source_json_data/xywy/json_data/'

    list1 = get_disease(json_path1)  # 7784
    # print(len(list1), len(list(set(list1))))

    # count_item(list1)

    list2 = get_disease(json_path2)  # 6746
    # print(len(list2), len(list(set(list2))))

    list3 = get_disease(json_path3)  # 6544
    # print(len(list3), len(list(set(list3))))
    # count_item(list3)

    # 3个里面都有的
    # _39_pingan_xywy = list(set(list1) & set(list2) & set(list3))
    # print(len(_39_pingan_xywy))     # 3170
    # with open('39_pingan_xywy.txt', encoding='utf-8', mode='a') as f1:
    #     for disease_name in _39_pingan_xywy:
    #         f1.write(disease_name + '\n')

    # 39 和平安 都有的
    _39_pingan = (set(list1) & set(list2)).difference(set(list1) & set(list2) & set(list3))
    print(len(_39_pingan))  # 1423
    # with open('39_pingan.txt', encoding='utf-8', mode='a') as f2:
    #     for disease_name in _39_pingan:
    #         f2.write(disease_name + '\n')

    # 39 和xywy 都有的
    _39_xywy = (set(list1) & set(list3)).difference(set(list1) & set(list2) & set(list3))
    print(len(_39_xywy))  # 1951
    # with open('39_xywy.txt', encoding='utf-8', mode='a') as f3:
    #     for disease_name in _39_xywy:
    #         f3.write(disease_name + '\n')

    # pingan 和xywy 都有的
    pingan_xywy = (set(list2) & set(list3)).difference(set(list1) & set(list2) & set(list3))
    print(len(pingan_xywy))  # 389
    # with open('piangan_xywy.txt', encoding='utf-8', mode='a') as f4:
    #     for disease_name in pingan_xywy:
    #         f4.write(disease_name + '\n')

    # 39 自己有的
    _39 = set(list1).difference(set(list1) & set(list2)).difference(set(list1) & set(list3))
    print(len(_39))  # 1240
    with open('_39.txt', encoding='utf-8', mode='a') as f5:
        for disease_name in _39:
            f5.write(disease_name + '\n')

    # 平安自己有的
    pingan = set(list2).difference(set(list1) & set(list2)).difference(set(list2) & set(list3))
    print(len(pingan))  # 1764
    with open('pingan.txt', encoding='utf-8', mode='a') as f6:
        for disease_name in pingan:
            f6.write(disease_name + '\n')

    # xywy自己有的
    xywy = set(list3).difference(set(list1) & set(list3)).difference(set(list2) & set(list3))
    print(len(xywy))  # 1034
    with open('xywy.txt', encoding='utf-8', mode='a') as f7:
        for disease_name in xywy:
            f7.write(disease_name + '\n')

    # print(len(set(list1) & set(list2)))
    # print(len(set(list1) & set(list3)))
    # print(len(set(list2) & set(list3)))
    # print(len(set(list1) | set(list2) | set(list3)))
